Introduction

This assignment constitute in two different problems:

You may show or hide the code chunks by clicking on the Code buttons on the right of the document.

Bar charts

Data extraction and manipulation

The first step to display the bar chart is to extract and group by DATE and CUSTOMERCLASS the different data related to the Electricity consumption among the Elec- Residential and Elec- Commercial total consumption:

pge_20_q1_elec <- read_csv("PGE_2020_Q1_ElectricUsageByZip.csv")
pge_20_q2_elec <- read_csv("PGE_2020_Q2_ElectricUsageByZip.csv")
pge_19_q1_elec <- read_csv("PGE_2019_Q1_ElectricUsageByZip.csv")
pge_19_q2_elec <- read_csv("PGE_2019_Q2_ElectricUsageByZip.csv")
pge_19_q3_elec <- read_csv("PGE_2019_Q3_ElectricUsageByZip.csv")
pge_19_q4_elec <- read_csv("PGE_2019_Q4_ElectricUsageByZip.csv")
pge_18_q1_elec <- read_csv("PGE_2018_Q1_ElectricUsageByZip.csv")
pge_18_q2_elec <- read_csv("PGE_2018_Q2_ElectricUsageByZip.csv")
pge_18_q3_elec <- read_csv("PGE_2018_Q3_ElectricUsageByZip.csv")
pge_18_q4_elec <- read_csv("PGE_2018_Q4_ElectricUsageByZip.csv")
pge_17_q1_elec <- read_csv("PGE_2017_Q1_ElectricUsageByZip.csv")
pge_17_q2_elec <- read_csv("PGE_2017_Q2_ElectricUsageByZip.csv")
pge_17_q3_elec <- read_csv("PGE_2017_Q3_ElectricUsageByZip.csv")
pge_17_q4_elec <- read_csv("PGE_2017_Q4_ElectricUsageByZip.csv")

pge_elec_20 <- rbind(pge_20_q1_elec,
                  pge_20_q2_elec,
                  pge_19_q1_elec,
                  pge_19_q2_elec,
                  pge_19_q3_elec,
                  pge_19_q4_elec,
                  pge_18_q1_elec,
                  pge_18_q2_elec,
                  pge_18_q3_elec,
                  pge_18_q4_elec,
                  pge_17_q1_elec,
                  pge_17_q2_elec,
                  pge_17_q3_elec,
                  pge_17_q4_elec)

pge_elec_20 <- rbind(pge_20_q1_elec,
                  pge_20_q2_elec)
pge_elec_20_inter <- 
  pge_elec_20 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Elec- Residential",
        "Elec- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
    )

pge_elec_19 <- rbind(pge_19_q1_elec,
                  pge_19_q2_elec,
                  pge_19_q3_elec,
                  pge_19_q4_elec)
pge_elec_19_inter <- 
  pge_elec_19 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Elec- Residential",
        "Elec- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  )


pge_elec_18 <- rbind(pge_18_q1_elec,
                  pge_18_q2_elec,
                  pge_18_q3_elec,
                  pge_18_q4_elec)
pge_elec_18_inter <- 
  pge_elec_18 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Elec- Residential",
        "Elec- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  ) 

pge_elec_17 <- rbind(pge_17_q1_elec,
                  pge_17_q2_elec,
                  pge_17_q3_elec,
                  pge_17_q4_elec)
pge_elec_17_inter <- 
  pge_elec_17 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Elec- Residential",
        "Elec- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  )

pge_elec_17_inter[17,4]<-pge_elec_17_inter[17,4]/2
pge_elec_17_inter[18,4]<-pge_elec_17_inter[18,4]/2

pge_elec_inter <- rbind(pge_elec_17_inter,
                       pge_elec_18_inter,
                       pge_elec_19_inter,
                       pge_elec_20_inter)
pge_elec_inter$DATE <- paste0(pge_elec_inter$YEAR,"/",pge_elec_inter$MONTH,"/01")

pge_elec <-
  pge_elec_inter %>%
  select(
    !c(YEAR,MONTH)
  ) %>%
  mutate(
    TOTALkBTU=
      TOTALKWH*3.412
  )%>%
  group_by(DATE)

pge_elec_final <-
  select(
    pge_elec,
    c(DATE,CUSTOMERCLASS,TOTALkBTU)
  )
saveRDS(pge_elec_final, "pge_elec_final.rds")

Then, I extracted and grouped by DATE and CUSTOMERCLASS the different data related to the Gas consumption among the Gas- Residential and Gas- Commercial total consumption:

pge_20_q1_gas <- read_csv("PGE_2020_Q1_GasUsageByZip.csv")
pge_20_q2_gas <- read_csv("PGE_2020_Q2_GasUsageByZip.csv")
pge_19_q1_gas <- read_csv("PGE_2019_Q1_GasUsageByZip.csv")
pge_19_q2_gas <- read_csv("PGE_2019_Q2_GasUsageByZip.csv")
pge_19_q3_gas <- read_csv("PGE_2019_Q3_GasUsageByZip.csv")
pge_19_q4_gas <- read_csv("PGE_2019_Q4_GasUsageByZip.csv")
pge_18_q1_gas <- read_csv("PGE_2018_Q1_GasUsageByZip.csv")
pge_18_q2_gas <- read_csv("PGE_2018_Q2_GasUsageByZip.csv")
pge_18_q3_gas <- read_csv("PGE_2018_Q3_GasUsageByZip.csv")
pge_18_q4_gas <- read_csv("PGE_2018_Q4_GasUsageByZip.csv")
pge_17_q1_gas <- read_csv("PGE_2017_Q1_GasUsageByZip.csv")
pge_17_q2_gas <- read_csv("PGE_2017_Q2_GasUsageByZip.csv")
pge_17_q3_gas <- read_csv("PGE_2017_Q3_GasUsageByZip.csv")
pge_17_q4_gas <- read_csv("PGE_2017_Q4_GasUsageByZip.csv")

pge_gas_20 <- rbind(pge_20_q1_gas,
                  pge_20_q2_gas,
                  pge_19_q1_gas,
                  pge_19_q2_gas,
                  pge_19_q3_gas,
                  pge_19_q4_gas,
                  pge_18_q1_gas,
                  pge_18_q2_gas,
                  pge_18_q3_gas,
                  pge_18_q4_gas,
                  pge_17_q1_gas,
                  pge_17_q2_gas,
                  pge_17_q3_gas,
                  pge_17_q4_gas)

pge_gas_20 <- rbind(pge_20_q1_gas,
                  pge_20_q2_gas)
pge_gas_20_inter <- 
  pge_gas_20 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Gas- Residential",
        "Gas- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGETHM)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALTHM =
      sum(
        TOTALTHM,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  )

pge_gas_19 <- rbind(pge_19_q1_gas,
                  pge_19_q2_gas,
                  pge_19_q3_gas,
                  pge_19_q4_gas)
pge_gas_19_inter <- 
  pge_gas_19 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Gas- Residential",
        "Gas- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGETHM)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALTHM =
      sum(
        TOTALTHM,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  )

pge_gas_18 <- rbind(pge_18_q1_gas,
                  pge_18_q2_gas,
                  pge_18_q3_gas,
                  pge_18_q4_gas)
pge_gas_18_inter <- 
  pge_gas_18 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Gas- Residential",
        "Gas- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED, AVERAGETHM)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALTHM =
      sum(
        TOTALTHM,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  ) 

pge_gas_17 <- rbind(pge_17_q1_gas,
                  pge_17_q2_gas,
                  pge_17_q3_gas,
                  pge_17_q4_gas)
pge_gas_17_inter <- 
  pge_gas_17 %>%
  filter(
    CUSTOMERCLASS %in%
      c(
        "Gas- Residential",
        "Gas- Commercial"
      )
  ) %>%
  select(
    !c(COMBINED,AVERAGETHM)
  ) %>%
  group_by(YEAR, MONTH, CUSTOMERCLASS) %>%
  summarize(
    TOTALTHM =
      sum(
        TOTALTHM,
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      )
  ) 

pge_gas_17_inter[17,4]<-pge_gas_17_inter[17,4]/2
pge_gas_17_inter[18,4]<-pge_gas_17_inter[18,4]/2

pge_gas_inter <- rbind(pge_gas_17_inter,
                       pge_gas_18_inter,
                       pge_gas_19_inter,
                       pge_gas_20_inter)
pge_gas_inter$DATE <- paste0(pge_gas_inter$YEAR,"/",pge_gas_inter$MONTH,"/01")

pge_gas <-
  pge_gas_inter %>%
  select(
    !c(YEAR,MONTH)
  ) %>%
  mutate(
    TOTALkBTU=
      TOTALTHM*100
  )%>%
  group_by(DATE)

pge_gas_final <-
  select(
    pge_gas,
    c(DATE,CUSTOMERCLASS,TOTALkBTU,YEAR)
  )
saveRDS(pge_gas_final, "pge_gas_final.rds")

Two important manipulations can be noticed. First, the grouping by DATE needed the creation of an extra-column, and the conversion factors from kWh and Thm to kBtu are different (respectively 3.412 and 100). Furthermore, the data from September 2017 seemed corrupt (approximately twice higher than for the other months). This is why I added the following lines you can find in the previous code chunks:

pge_elec_17_inter[17,4]<-pge_elec_17_inter[17,4]/2
pge_elec_17_inter[18,4]<-pge_elec_17_inter[18,4]/2
pge_gas_17_inter[17,4]<-pge_gas_17_inter[17,4]/2
pge_gas_17_inter[18,4]<-pge_gas_17_inter[18,4]/2

Bar charts plotting

I can now plot bar charts showing Monthly total kBTUs of Residential and Commercial Electricity and Gas Consumption for the Bay Area from 2017 to the latest available month using plot_ly() and as.Date() to define the DATE values as dates:

pge_gas_final$DATE <- as.Date(pge_gas_final$DATE, format="%Y/%m/%d")
pge_elec_final$DATE <- as.Date(pge_elec_final$DATE, format="%Y/%m/%d")

plot_ly() %>% 
  add_trace(
    data = pge_elec_final %>% filter(CUSTOMERCLASS == "Elec- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Elec- Residential"
  ) %>% 
  add_trace(
    data = pge_elec_final %>% filter(CUSTOMERCLASS == "Elec- Commercial"),
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Elec- Commercial"
  ) %>% 
  add_trace(
    data = pge_gas_final %>% filter(CUSTOMERCLASS == "Gas- Residential"),
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Gas- Residential"
  ) %>% 
  add_trace(
    data = pge_gas_final %>% filter(CUSTOMERCLASS == "Gas- Commercial"),
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Gas- Commercial"
  ) %>% 
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    legend = list(title = list(text = "Electricity Type")),
    title = 'Monthly total kBTUs of Residential and Commercial Electricity \n and Gas Consumption for the Bay Area',
    colorway = c('darkgreen','darkseagreen4','darkgoldenrod4','darkgoldenrod1')
  ) %>% 
  config(displayModeBar = F)

Interpretation

In order to analyze more precisely the potential differences in energy consumption due to the COVID-19 pandemic, I find it pertinent to separate the data related to the different type of consumption (Elec- Residential, Elec- Commercial, Gas- Residential and Gas- Commercial) for the months of March, April, May and June of years 2017, 2018, 2019 and 2020.

First, I plot the bar chart comparing the Elec- Residential consumption for these periods of time:

pge_compare_elec_res_17 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Residential")%>%
  filter(DATE=="2017-03-01"|DATE=="2017-04-01"|DATE=="2017-05-01"|DATE=="2017-06-01")
pge_compare_elec_res_18 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Residential")%>%
  filter(DATE=="2018-03-01"|DATE=="2018-04-01"|DATE=="2018-05-01"|DATE=="2018-06-01")
pge_compare_elec_res_19 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Residential")%>%
  filter(DATE=="2019-03-01"|DATE=="2019-04-01"|DATE=="2019-05-01"|DATE=="2019-06-01")
pge_compare_elec_res_20 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Residential")%>%
  filter(DATE=="2020-03-01"|DATE=="2020-04-01"|DATE=="2020-05-01"|DATE=="2020-06-01")

plot_ly() %>% 
  add_trace(
    data = pge_compare_elec_res_17 %>% filter(CUSTOMERCLASS == "Elec- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2017"
  ) %>%
  add_trace(
    data = pge_compare_elec_res_18 %>% filter(CUSTOMERCLASS == "Elec- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2018"
  ) %>%
  add_trace(
    data = pge_compare_elec_res_19 %>% filter(CUSTOMERCLASS == "Elec- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2019"
  ) %>%
  add_trace(
    data = pge_compare_elec_res_20 %>% filter(CUSTOMERCLASS == "Elec- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2020"
  ) %>%
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    title = 'Comparison in Residential Electricity Consumption',
    colorway = c('darkolivegreen1','darkolivegreen3','darkolivegreen4','darkolivegreen')
  ) %>% 
  config(displayModeBar = F)

For the months of March, the Residential Electricity consumption remains relatively constant. In fact, COVID crisis started at the end of the month, which could explain a so little difference in relative consumption.

However, we can notice an increasing difference starting in April. The Residential Electricity consumption increased by 0.37 BkBtu, 0.65 BkBtu and 0.54 BkBtu respectively compared to the months of April 2017, 2018 and 2019.

This increasing gets more important for the months of May (+0.37, +1.01 and +1.25 BkBtu), and also for the months of June - except compared to June 2017 when the consumption was higher (+0.75 BkBtu compared to 2018 and +0.41 BkBtu compared to 2019).

The high differences in the months of April and May can be linked to the imposed lockdown. More time is then spent at home, especially for teleworking. As the main tool for teleworking is electronic (phone, computer etc…), the electricity consumption in residences is directly impacted. The relief in regulations can explain the relative difference decreasing between the months of May and June. Because some offices reopened, the total number of people working from home may have significantly decrease, therefore reducing the global increase in Residential Electricity consumption compared to the previous month.

Now, I plot the bar chart comparing the Gas- Residential consumption:

pge_compare_gas_res_17 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Residential")%>%
  filter(DATE=="2017-03-01"|DATE=="2017-04-01"|DATE=="2017-05-01"|DATE=="2017-06-01")
pge_compare_gas_res_18 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Residential")%>%
  filter(DATE=="2018-03-01"|DATE=="2018-04-01"|DATE=="2018-05-01"|DATE=="2018-06-01")
pge_compare_gas_res_19 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Residential")%>%
  filter(DATE=="2019-03-01"|DATE=="2019-04-01"|DATE=="2019-05-01"|DATE=="2019-06-01")
pge_compare_gas_res_20 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Residential")%>%
  filter(DATE=="2020-03-01"|DATE=="2020-04-01"|DATE=="2020-05-01"|DATE=="2020-06-01")

plot_ly() %>% 
  add_trace(
    data = pge_compare_gas_res_17 %>% filter(CUSTOMERCLASS == "Gas- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2017"
  ) %>%
  add_trace(
    data = pge_compare_gas_res_18 %>% filter(CUSTOMERCLASS == "Gas- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2018"
  ) %>%
  add_trace(
    data = pge_compare_gas_res_19 %>% filter(CUSTOMERCLASS == "Gas- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2019"
  ) %>%
  add_trace(
    data = pge_compare_gas_res_20 %>% filter(CUSTOMERCLASS == "Gas- Residential"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2020"
  ) %>%
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    title = 'Comparison in Residential Gas Consumption',
    colorway = c('gold','darkgoldenrod1','darkgoldenrod3','darkgoldenrod4')
  ) %>% 
  config(displayModeBar = F)

When it comes to Residential Gas consumption, it remains relatively constant for every months except the months of April.

For the months of March, I assume that it is for the same reason as for the Residential Electricity consumption. As for the months of May and June, we can state that most of the utility gas consumption is directly related to heating, which is not necessary in Spring in the Bay Area because the outdoor air temperature is usually quite high, especially on day time. Therefore, even if people worked at home, consuming gas to heat the house was not needed.

However, heating might be needed in April. As people were still stuck at home due to COVID, it could explain such an increase in Residential Gas consumption (+1.898, +2.117 and +3.285 BkBtu compared to the months of April in 2017, 2018 and 2019).

Similarly, I can plot the bar chart comparing the Elec- Commercial consumption:

pge_compare_elec_com_17 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Commercial")%>%
  filter(DATE=="2017-03-01"|DATE=="2017-04-01"|DATE=="2017-05-01"|DATE=="2017-06-01")
pge_compare_elec_com_18 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Commercial")%>%
  filter(DATE=="2018-03-01"|DATE=="2018-04-01"|DATE=="2018-05-01"|DATE=="2018-06-01")
pge_compare_elec_com_19 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Commercial")%>%
  filter(DATE=="2019-03-01"|DATE=="2019-04-01"|DATE=="2019-05-01"|DATE=="2019-06-01")
pge_compare_elec_com_20 <-
  pge_elec_final %>%
  filter(CUSTOMERCLASS=="Elec- Commercial")%>%
  filter(DATE=="2020-03-01"|DATE=="2020-04-01"|DATE=="2020-05-01"|DATE=="2020-06-01")

plot_ly() %>% 
  add_trace(
    data = pge_compare_elec_com_17 %>% filter(CUSTOMERCLASS == "Elec- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2017"
  ) %>%
  add_trace(
    data = pge_compare_elec_com_18 %>% filter(CUSTOMERCLASS == "Elec- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2018"
  ) %>%
  add_trace(
    data = pge_compare_elec_com_19 %>% filter(CUSTOMERCLASS == "Elec- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2019"
  ) %>%
  add_trace(
    data = pge_compare_elec_com_20 %>% filter(CUSTOMERCLASS == "Elec- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2020"
  ) %>%
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    title = 'Comparison in Commercial Electricity Consumption',
    colorway = c('darkseagreen1','darkseagreen','darkolivegreen3','darkseagreen4')
  ) %>% 
  config(displayModeBar = F)

For the Commercial Electricity consumption, we can observe a phenomena counteracting the variations observed for the Residential Electricity consumption: it is slightly lower in March 2020 (-0.705, -0.759 and -0.479 BkBtu compared to 2017, 2018 and 2019), then much higher in April (-1.325, -0.945 and -1.597 BkBtu). This gap is reduced in May (-1.237, -0.748 and -1.005 BkBtu), and even more in June (-1.108, -.0785 and -0.950 BkBtu).

At first sight, I would state that the global consumption stays even throughout the years despite the COVID pandemic in 2020. Let’s plot the bar chart comparing the Gas- Commercial consumption to see if it also counteracts the variations observed for the Residential Gas consumption:

pge_compare_gas_com_17 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Commercial")%>%
  filter(DATE=="2017-03-01"|DATE=="2017-04-01"|DATE=="2017-05-01"|DATE=="2017-06-01")
pge_compare_gas_com_18 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Commercial")%>%
  filter(DATE=="2018-03-01"|DATE=="2018-04-01"|DATE=="2018-05-01"|DATE=="2018-06-01")
pge_compare_gas_com_19 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Commercial")%>%
  filter(DATE=="2019-03-01"|DATE=="2019-04-01"|DATE=="2019-05-01"|DATE=="2019-06-01")
pge_compare_gas_com_20 <-
  pge_gas_final %>%
  filter(CUSTOMERCLASS=="Gas- Commercial")%>%
  filter(DATE=="2020-03-01"|DATE=="2020-04-01"|DATE=="2020-05-01"|DATE=="2020-06-01")

plot_ly() %>% 
  add_trace(
    data = pge_compare_gas_com_17 %>% filter(CUSTOMERCLASS == "Gas- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2017"
  ) %>%
  add_trace(
    data = pge_compare_gas_com_18 %>% filter(CUSTOMERCLASS == "Gas- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2018"
  ) %>%
  add_trace(
    data = pge_compare_gas_com_19 %>% filter(CUSTOMERCLASS == "Gas- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2019"
  ) %>%
  add_trace(
    data = pge_compare_gas_com_20 %>% filter(CUSTOMERCLASS == "Gas- Commercial"),
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "2020"
  ) %>%
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    title = 'Comparison in Commercial Gas Consumption',
    colorway = c('gold1','goldenrod1','gold3','gold4')
  ) %>% 
  config(displayModeBar = F)

The Commercial Gas consumption globally increases between 2017 and 2019. Therefore, I will only compare the consumption in 2020 with the data from 2019 to have a proper interpretation of the impact of the pandemic.

The difference between March 2019 and March 2020 is very low (only -0.245 BkBtu), and slightly more important in the months of April and June (-0.513 and -0.514 BkBtu). However, there is a huge gap in May (-1.443 BkBtu). This can be explained by the closings of shopping facilities, restaurants and office buildings. However, it does not completely balance with the increasing in Residential Gas consumption. In fact, a third of gas consumption is for industrial use according to the US Energy Information Administration. Even if industrial activity was limited during the pandemic, it was not totally shutdown. Furthermore, heating an office building is less energy consuming per capita compared to house heating. The gas consumption for heating is therefore increased.

Conclusion

Let’s conclude on the impact of COVID-19 on energy consumption in the Bay Area:

pge_compare_final_gas_com <-
  rbind(pge_compare_gas_com_17,pge_compare_gas_com_18,pge_compare_gas_com_19,pge_compare_gas_com_20)

pge_compare_final_gas_res <-
  rbind(pge_compare_gas_res_17,pge_compare_gas_res_18,pge_compare_gas_res_19,pge_compare_gas_res_20)

pge_compare_final_elec_res <-
  rbind(pge_compare_elec_res_17,pge_compare_elec_res_18,pge_compare_elec_res_19,pge_compare_elec_res_20)

pge_compare_final_elec_com <-
  rbind(pge_compare_elec_com_17,pge_compare_elec_com_18,pge_compare_elec_com_19,pge_compare_elec_com_20)

plot_ly() %>% 
  add_trace(
    data = pge_compare_final_elec_res,
    x = ~DATE %>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Elec- Residential"
  ) %>% 
  add_trace(
    data = pge_compare_final_elec_com,
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Elec- Commercial"
  ) %>% 
  add_trace(
    data = pge_compare_final_gas_res,
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Gas- Residential"
  ) %>% 
  add_trace(
    data = pge_compare_final_gas_com,
    x = ~DATE%>% factor(),
    y = ~TOTALkBTU,
    type = "bar",
    name = "Gas- Commercial"
  ) %>% 
  layout(
    xaxis = list(
      title = "Year - Month",
      fixedrange = T
    ),
    yaxis = list(
      title = "Consumption in kBtu",
      fixedrange = T
    ),
    barmode = "stack",
    legend = list(title = list(text = "Electricity Type")),
    title = 'Comparison of Residential and Commercial Electricity \n and Gas Consumption for the Bay Area',
    colorway = c('darkgreen','darkseagreen4','darkgoldenrod4','darkgoldenrod1')
  ) %>% 
  config(displayModeBar = F)

Except for the month of April, the global consumption stays even compared to previous years. In fact, the pandemic had a noticeable impact on the repartition of the sources of energy. The commercial use of electricity and gas reduced while the residential greatly increased, especially for gas. House heating surely was the first factor implied in these variations. In terms of policies, I think we can assume that the global lockdown in the Bay Area was what most impacted the energy consumption.

Maps

Data extraction and manipulation

To create the maps, I will use the data from the first two quarters of 2020 and 2019, then compute the difference in consumption for both the Residential and Commercial Electricity consumption:

pge_elec_COVID_res_20 <-
  rbind(pge_20_q1_elec,
        pge_20_q2_elec
    )%>%
  filter(
    CUSTOMERCLASS %in%
      c("Elec- Residential")
    )%>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      )
  )

pge_elec_COVID_res_19 <-
  rbind(pge_19_q1_elec,
        pge_19_q2_elec
    )%>%
  filter(
    CUSTOMERCLASS %in%
      c("Elec- Residential")
    )%>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      )
  )

TOTALKWH20 = as.data.frame(pge_elec_COVID_res_20[1:864,2])
TOTALKWH19 = as.data.frame(pge_elec_COVID_res_19[1:864,2])
DIFFKWH = TOTALKWH20 - TOTALKWH19

pge_elec_COVID_res <- 
  cbind(as.data.frame(pge_elec_COVID_res_20[1:864,1]),DIFFKWH)%>%
  rename(DIFFKWH=TOTALKWH)


pge_elec_COVID_com_20 <-
  rbind(pge_20_q1_elec,
        pge_20_q2_elec
    )%>%
  filter(
    CUSTOMERCLASS %in%
      c("Elec- Commercial")
    )%>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      )
  )

pge_elec_COVID_com_19 <-
  rbind(pge_19_q1_elec,
        pge_19_q2_elec
    )%>%
  filter(
    CUSTOMERCLASS %in%
      c("Elec- Commercial")
    )%>%
  select(
    !c(COMBINED, AVERAGEKWH)
  ) %>%
  group_by(ZIPCODE) %>%
  summarize(
    TOTALKWH =
      sum(
        TOTALKWH,
        na.rm = T
      )
  )

COMTOTALKWH20 = as.data.frame(pge_elec_COVID_com_20[1:864,2])
COMTOTALKWH19 = as.data.frame(pge_elec_COVID_com_19[1:864,2])
COMDIFFKWH = COMTOTALKWH20 - COMTOTALKWH19

pge_elec_COVID_com <- 
  cbind(as.data.frame(pge_elec_COVID_com_20[1:864,1]),COMDIFFKWH)%>%
  rename(DIFFKWH=TOTALKWH)

Difference in Residential Electricity consumption

Using the leaflet() function, I can plot the map showing the difference in Residential Electricity consumption for the period of time I chose to study. In order to make the map as clear as possible, I set a distinguishable color palette with the colorNumeric() function. It is important to notice that a manipulation is required to set the value 0 as the middle value of the palette. To achieve that, I set a domain corresponding to the range(max(pge_elec_COVID_res_map$DIFFKWH, na.rm = TRUE),-max(pge_elec_COVID_res_map$DIFFKWH, na.rm = TRUE)) instead of only the original values of the differences. To deal wit NA values, I use the na.omit() function for the data values int the addPolygons() and addLegend() arguments of the leaflet() method.

ca_counties <- counties("CA", cb = T, progress_bar = F)
bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )
bay_counties <-
  ca_counties %>%
  filter(NAME %in% bay_county_names)

ca_cities <- places("CA", cb = T, progress_bar = FALSE)
bay_cities <- ca_cities[bay_counties, ]
bay_cities_within <-
  ca_cities %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(ca_cities %>% select(GEOID)) %>% 
  st_as_sf()

bay_cities_within <-
  ca_cities[which(ca_cities$GEOID %in% st_centroid(ca_cities)[bay_counties, ]$GEOID), ]

usa_zips <- 
  zctas(cb = T, progress_bar = F)

bay_zips <-
  usa_zips %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(usa_zips %>% select(GEOID10)) %>% 
  st_as_sf()

pge_elec_COVID_res_map <-
  pge_elec_COVID_res %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>% 
  st_as_sf() %>% 
  st_transform(4326)

saveRDS(pge_elec_COVID_res_map, "pge_elec_COVID_res_map.rds")

res_pal <- colorNumeric(
  palette = c('blue','white',"brown"),
  domain = 
    range(max(pge_elec_COVID_res_map$DIFFKWH, na.rm = TRUE),-max(pge_elec_COVID_res_map$DIFFKWH, na.rm = TRUE))
)

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = na.omit(pge_elec_COVID_res_map),
    fillColor = ~res_pal(DIFFKWH),
    color = "white",
    opacity = 0.7,
    fillOpacity = 0.6,
    weight = 1,
    label = ~paste0(
      round(DIFFKWH), 
      " kWh total"
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = na.omit(pge_elec_COVID_res_map),
    pal = res_pal,
    values = ~DIFFKWH,
    title = "Total Residential Consumption Difference<br>kWh<br>First semester of 2020<br>vs First semester of 2019"
  )

At first sight, it is difficult to distinguish a geographical pattern. The differences are either negative or positive for two neighboring geographic zones. This is why I focused my attention on the internal features of each zone. It helped me distinguish two global trends:

  • The most populated zones showed an increased consumption compared to 2019, while it decreased in the sparsely populated ones
  • The Residential Electricity consumption in the zones on coasts generally decreased

The first one is directly explained by the lockdown policies and I already dealt with it in the first part of the assignment. The second one brings a new perspective to the global analysis. The coastal activity is mainly related to freight and international trade. Coastal houses are therefore usually worker’s housing that were not inhabited during the pandemic due to the international trades limitations.

However, a important flaw can be noticed in this method of analysis: a high value might not be related to a much higher consumption. In fact, a difference in population can explain such disparities. Therefore, I will plot the map showing the differences in Residential Electricity consumption in the Bay Area in percentages. The inherent problem of this new method is the potential infinite values that could appear if no consumption was reported in 2019. To deal with that, I use the IDPmisc library to be able to use NaRV.omit() function. The only limitation to this new method is the handling of the infinite/NA values when it comes to plotting, because the NaRV.omit() does not work in the leaflet() environment. This is why I can only use the na.omit() function in leaflet(), and therefore remove the extreme values only in the legend.

TOTALKWH20 = as.data.frame(pge_elec_COVID_res_20[1:864,2])
TOTALKWH19 = as.data.frame(pge_elec_COVID_res_19[1:864,2])
DIFFKWH_per = ((TOTALKWH20 - TOTALKWH19)/TOTALKWH19)*100

library(IDPmisc)

pge_elec_COVID_res_per <- 
  cbind(as.data.frame(pge_elec_COVID_res_20[1:864,1]),DIFFKWH_per)%>%
  rename(DIFFKWH_per=TOTALKWH)


pge_elec_COVID_res_map_per <-
  pge_elec_COVID_res_per %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>% 
  st_as_sf() %>% 
  st_transform(4326)

saveRDS(pge_elec_COVID_res_map_per, "pge_elec_COVID_res_map_per.rds")

res_pal <- colorNumeric(
  palette = c('blue','white',"brown"),
  domain = 
    range((max(NaRV.omit(pge_elec_COVID_res_map_per$DIFFKWH_per))),-max(NaRV.omit(pge_elec_COVID_res_map_per$DIFFKWH_per)))
  )

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = na.omit(pge_elec_COVID_res_map_per),
    fillColor = ~res_pal(pge_elec_COVID_res_map_per$DIFFKWH_per),
    color = "white",
    opacity = 0.7,
    fillOpacity = 0.8,
    weight = 1,
    label = ~paste0(
      round(pge_elec_COVID_res_map_per$DIFFKWH_per), 
      " % "
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = na.omit(pge_elec_COVID_res_map_per),
    pal = res_pal,
    values = ~NaRV.omit(pge_elec_COVID_res_map_per$DIFFKWH_per),
    title = "Residential Consumption Difference<br>in percentage<br>First semester of 2020<br>vs First semester of 2019"
  )

This new map clearly shows that the Residential Electricity consumption in the Bay Area globally increased during the COVID pandemic compared to last year. It indeed decreased on the coasts and in the sparsely populated areas, but it is very negligible compared to the peak consumption in the big cities and the very populated districts.

Difference in Commercial Electricity consumption

Let’s plot the map showing the difference in Commercial Electricity consumption for the same period of time. Because the range of value is much wider than for the previous map (absolute variation of approximately 500 MkWh against approximately 120 MkWh), I had to work on my plot settings to increase the contrasts between whiter zones. To do that, I changed the border colors to black and increased their opacity (color = "black" and opacity = 0.9). I also slightly increased the fillOpacity:

ca_counties <- counties("CA", cb = T, progress_bar = F)
bay_county_names <-
  c(
    "Alameda",
    "Contra Costa",
    "Marin",
    "Napa",
    "San Francisco",
    "San Mateo",
    "Santa Clara",
    "Solano",
    "Sonoma"
  )
bay_counties <-
  ca_counties %>%
  filter(NAME %in% bay_county_names)

ca_cities <- places("CA", cb = T, progress_bar = FALSE)
bay_cities <- ca_cities[bay_counties, ]
bay_cities_within <-
  ca_cities %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(ca_cities %>% select(GEOID)) %>% 
  st_as_sf()

bay_cities_within <-
  ca_cities[which(ca_cities$GEOID %in% st_centroid(ca_cities)[bay_counties, ]$GEOID), ]

usa_zips <- 
  zctas(cb = T, progress_bar = F)

bay_zips <-
  usa_zips %>% 
  st_centroid() %>% 
  .[bay_counties, ] %>% 
  st_set_geometry(NULL) %>% 
  left_join(usa_zips %>% select(GEOID10)) %>% 
  st_as_sf()

pge_elec_COVID_com_map <-
  pge_elec_COVID_com %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>% 
  st_as_sf() %>% 
  st_transform(4326)

saveRDS(pge_elec_COVID_com_map, "pge_elec_COVID_res_com.rds")

res_pal <- colorNumeric(
  palette = c('blue','white',"brown"),
  domain = 
    range(max(pge_elec_COVID_com_map$DIFFKWH, na.rm = TRUE),-max(pge_elec_COVID_com_map$DIFFKWH, na.rm = TRUE))
)

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = na.omit(pge_elec_COVID_com_map),
    fillColor = ~res_pal(DIFFKWH),
    color = "black",
    opacity = 0.9,
    fillOpacity = 0.75,
    weight = 1,
    label = ~paste0(
      round(DIFFKWH), 
      " kWh total in ",
      ZIPCODE
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = na.omit(pge_elec_COVID_com_map),
    pal = res_pal,
    values = ~DIFFKWH,
    title = "Total Commercial Consumption Difference<br>kWh<br>First semester of 2020<br>vs First semester of 2019"
  )

We can notice two important increases in Commercial Electricity consumption in two regions: West Cupertino and South West Hayvard. Observing their surrounding areas leads to a better understanding of the global phenomena. In fact, regions where the consumption increased compared to last year are surrounded with regions where it decreased. This can be interpreted as a concentration of the economic activity within the cities. As mentioned before, lockdown led to a temporary closing of office buildings. The suburban commercial activity had then to pause for the benefit of the downtown commercial activity. For example, the restaurants close to office buildings usually serving regulars at noon had to close while the local restaurants near the residential areas went through a peak period.

For the same reasons as for the Residential Electricity consumption, I will plot the difference in Commercial Electricity consumption in percentages to have a better perspective on the impacts of COVID-19 on energy consumption in the Bay Area:

COMTOTALKWH20 = as.data.frame(pge_elec_COVID_com_20[1:864,2])
COMTOTALKWH19 = as.data.frame(pge_elec_COVID_com_19[1:864,2])
COMDIFFKWH_per = ((COMTOTALKWH20 - COMTOTALKWH19)/COMTOTALKWH19)*100

pge_elec_COVID_com_per <- 
  cbind(as.data.frame(pge_elec_COVID_com_20[1:864,1]),COMDIFFKWH_per)%>%
  rename(DIFFKWH_per=TOTALKWH)

pge_elec_COVID_com_map_per <-
  pge_elec_COVID_com_per %>%
  mutate(
    ZIPCODE = ZIPCODE %>% as.character()
  ) %>%
  right_join(
    bay_zips %>% select(GEOID10),
    by = c("ZIPCODE" = "GEOID10")
  ) %>% 
  st_as_sf() %>% 
  st_transform(4326)

saveRDS(pge_elec_COVID_com_map_per, "pge_elec_COVID_res_com_per.rds")

res_pal <- colorNumeric(
  palette = c('blue','white',"brown"),
  domain = 
    range(max(NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per)),-max(NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per)))
)

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = na.omit(pge_elec_COVID_com_map_per),
    fillColor = ~res_pal(pge_elec_COVID_com_map_per$DIFFKWH_per),
    color = "white",
    opacity = 0.9,
    fillOpacity = 0.6,
    weight = 1,
    label = ~paste0(
      round(pge_elec_COVID_com_map_per$DIFFKWH_per), 
      " % "
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = na.omit(pge_elec_COVID_com_map_per),
    pal = res_pal,
    values = ~NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per),
    title = "Commercial Consumption Difference<br>in percentage<br>First semester of 2020<br>vs First semester of 2019"
  )

A very high percentage near Acalanes Ridge/Walnut Creek does not allow a great visualization of the nuances. This is why I remove the row related to this data.

pge_elec_COVID_com_map_per<-pge_elec_COVID_com_map_per[-c(281),]
res_pal <- colorNumeric(
  palette = c('blue','white',"brown"),
  domain = 
    range(max(NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per)),-max(NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per)))
)

leaflet() %>% 
  addTiles() %>% 
  addPolygons(
    data = na.omit(pge_elec_COVID_com_map_per),
    fillColor = ~res_pal(pge_elec_COVID_com_map_per$DIFFKWH_per),
    color = "white",
    opacity = 0.9,
    fillOpacity = 0.65,
    weight = 1,
    label = ~paste0(
      round(pge_elec_COVID_com_map_per$DIFFKWH_per), 
      " % "
    ),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(
    data = na.omit(pge_elec_COVID_com_map_per),
    pal = res_pal,
    values = ~NaRV.omit(pge_elec_COVID_com_map_per$DIFFKWH_per),
    title = "Commercial Consumption Difference<br>in percentage<br>First semester of 2020<br>vs First semester of 2019"
  )

Despite this modification, the map is still hardly interpretable because of the grayish appearance of NA/Inf values very close to the low negative percentages. However, this also means that the positive values are much higher, especially within the cities as mentioned before (Hayward, Fremont and Fairfield).Therefore, this increasing in Commercial Electricity consumption clearly stands out from the slight differences in the other areas. These cities usually consist of huge residential buildings with a low rent price inhabited by workers who commute to more active cities (San Francisco, Oakland…). For reasons mentioned before, local economic activities flourished from the lockdown, leading to a high increase in Commercial Electricity consumption.

Conclusion

While the variations of Residential Electricity consumption are quite spread out in the Bay Area, the variations of Commercial Electricity consumption are much more concentrated. This highlights the uneven repartition of the economic activity in the Bay, as a major part of the Bay showed no variations of electricity consumption linked to commercial activities. Two outcomes can be possible; either there is very few commercial activities in these regions, or the activity remained the same between 2019 and 2020. Because of COVID-19 and state regulations, the first outcome seems more plausible.

I find it interesting to highlight how the impact of COVID-19 on the global repartition of energy consumption in a region such as a Bay Area helped us understand and analyze deeper issues, such as the repartition of the economic activity or the regional land use. The pandemic could therefore be used as a source of data to analyze repartition patterns and help build policies that can improve the wealth repartition standard of living in the Bay, while optimizing the physical and cash flows.